{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9515c08-c3cc-4b67-8298-cb5e6da8fae8",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python -m pip install duckdb"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "de891a8f-ef46-418e-81f1-bb400a20ffe0",
   "metadata": {},
   "source": [
    "# Getting Started With DuckDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "542e9da3-c459-45de-84d6-7b6287422e4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "duckdb.sql(\"SELECT 'whistling_duck' AS waterfowl, 'whistle' AS call\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c9020b1-a8b7-4dde-a70d-7ddc35d23dac",
   "metadata": {},
   "source": [
    "## Creating a Database From a Data Source"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ed567c9-06e3-4793-a43e-a6a9a985b4a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "conn = duckdb.connect(database=\"presidents.db\")\n",
    "\n",
    "presidents_relation = conn.read_parquet(\"presidents.parquet\")\n",
    "\n",
    "conn.sql(\n",
    "    \"\"\"\n",
    "    SELECT sequence, last_name, first_name\n",
    "    FROM presidents_relation\n",
    "    WHERE sequence <= 2\n",
    "    \"\"\"\n",
    ").show()\n",
    "\n",
    "presidents_relation.to_table(\"presidents\")\n",
    "\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88f82cd9-9056-4cdc-9ff1-9827d0900792",
   "metadata": {},
   "outputs": [],
   "source": [
    "with duckdb.connect(database=\"presidents.db\") as conn:\n",
    "    conn.sql(\n",
    "        \"\"\"\n",
    "        SELECT last_name, first_name\n",
    "        FROM presidents\n",
    "        WHERE last_name = 'Adams' \n",
    "        \"\"\"\n",
    "    ).show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a4d1b371-f16e-4a81-85fe-37411b3b2824",
   "metadata": {},
   "source": [
    "**This code won't work.**\n",
    "\n",
    "```python\n",
    "with duckdb.connect(database=\"presidents.db\") as conn:\n",
    "    conn.sql(\"SELECT * FROM presidents_relation\")\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d87920b4-cf7b-462b-8f89-7ab40d6ba8fa",
   "metadata": {},
   "source": [
    "## Correcting Your Data Import Errors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "491262f1-8841-459a-9fff-4d9b23caa4bb",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "with duckdb.connect(database=\"presidents.db\") as conn:\n",
    "    presidents_relation = conn.read_csv(\"presidents.csv\")\n",
    "    print(presidents_relation.limit(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1b48835-4f78-4be1-93b9-4b563a91238b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "with duckdb.connect(database=\"presidents.db\") as conn:\n",
    "    presidents_relation = conn.read_csv(\n",
    "        \"presidents.csv\", date_format=\"%B %d %Y\"\n",
    "    )\n",
    "    print(presidents_relation.dtypes)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "281fc954-cbe8-4e13-bc35-13ead7d28fcb",
   "metadata": {},
   "source": [
    "# Querying the Database\n",
    "## Querying Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8342da7-a20e-49f7-a156-18b875734f52",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "with duckdb.connect(database=\"presidents.db\") as conn:\n",
    "    conn.read_json(\"parties.json\").to_table(\"parties\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b795c6e-df4b-4fd7-b6cd-aff308740c1c",
   "metadata": {},
   "outputs": [],
   "source": [
    "with duckdb.connect(\"presidents.db\") as conn:\n",
    "    print(\n",
    "        conn.sql(\n",
    "            \"\"\"\n",
    "        SELECT first_name, last_name, party_name\n",
    "        FROM presidents\n",
    "        JOIN parties\n",
    "        ON presidents.party_id = parties.party_id\n",
    "        WHERE party_name = 'Whig'\n",
    "        ORDER BY last_name DESC\n",
    "        \"\"\"\n",
    "        )\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5e7da17a-6653-4c6b-85e9-61bbaad49ce5",
   "metadata": {},
   "source": [
    "## Querying Relations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11437a3f-c64e-4ba1-ab14-6d98f808c5f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "presidents = duckdb.read_parquet(\"presidents.parquet\")\n",
    "parties = duckdb.read_json(\"parties.json\")\n",
    "\n",
    "duckdb.sql(\n",
    "    \"\"\"\n",
    "    SELECT first_name, last_name, party_name\n",
    "    FROM presidents\n",
    "    JOIN parties\n",
    "    ON presidents.party_id = parties.party_id\n",
    "    WHERE party_name = 'Whig'\n",
    "    ORDER BY last_name DESC\n",
    "    \"\"\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2223996-ed7f-49cb-8aa7-34c4e4cf45c6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "presidents = duckdb.read_parquet(\"presidents.parquet\").set_alias(\"presidents\")\n",
    "parties = duckdb.read_json(\"parties.json\").set_alias(\"parties\")\n",
    "\n",
    "(\n",
    "    presidents.join(parties, \"presidents.party_id = parties.party_id\")\n",
    "    .select(\"first_name\", \"last_name\", \"party_name\")\n",
    "    .filter(\"party_name = 'Whig'\")\n",
    "    .order(\"last_name DESC\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7fffdaf5-447c-4918-9809-0a8f50620b05",
   "metadata": {},
   "outputs": [],
   "source": [
    "leaders = duckdb.read_parquet(\"presidents.parquet\").set_alias(\"usa_presidents\")\n",
    "faction = duckdb.read_json(\"parties.json\").set_alias(\"political_parties\")\n",
    "\n",
    "(\n",
    "    leaders.join(\n",
    "        faction, \"usa_presidents.party_id = political_parties.party_id\"\n",
    "    )\n",
    "    .select(\"first_name\", \"last_name\", \"party_name\")\n",
    "    .filter(\"party_name = 'Whig'\")\n",
    "    .order(\"last_name DESC\")\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8c300c6e-1b5e-41e6-bbdf-04ec2ce04b8c",
   "metadata": {},
   "source": [
    "# Using Concurrency\n",
    "## Performing Concurrent Reads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "03637669-43fb-4593-91e6-c44b425399b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from concurrent.futures import ThreadPoolExecutor\n",
    "\n",
    "import duckdb\n",
    "\n",
    "\n",
    "def read_data(thread_id):\n",
    "    print(f\"Thread {thread_id} starting its read.\")\n",
    "    with duckdb.connect(\"presidents.db\") as conn:\n",
    "        conn.sql(\n",
    "            \"\"\"\n",
    "            SELECT first_name, last_name\n",
    "            FROM presidents\n",
    "            WHERE sequence = 1\n",
    "            \"\"\"\n",
    "        ).show()\n",
    "    print(f\"Thread {thread_id} ending its read.\")\n",
    "\n",
    "\n",
    "def concurrent_read():\n",
    "    with ThreadPoolExecutor(max_workers=3) as executor:\n",
    "        executor.map(read_data, range(3))\n",
    "\n",
    "\n",
    "concurrent_read()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c231f1c1-8aa4-4efe-b54e-4c0ff53d1d7e",
   "metadata": {},
   "source": [
    "## Performing Concurrent Writes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65384b11-a04d-456a-956d-49ccc5057bf4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "\n",
    "def update_data(thread_id):\n",
    "    new_name = f\"George ({thread_id})\"\n",
    "    with duckdb.connect(\"presidents.db\") as conn:\n",
    "        print(f\"Thread {thread_id} starting its update.\")\n",
    "        conn.sql(\n",
    "            f\"\"\"\n",
    "            UPDATE presidents\n",
    "            SET first_name = '{new_name}'\n",
    "            WHERE sequence = 1\n",
    "            \"\"\"\n",
    "        )\n",
    "        print(f\"Thread {thread_id} ending its update.\")\n",
    "\n",
    "\n",
    "def concurrent_update():\n",
    "    with ThreadPoolExecutor(max_workers=3) as executor:\n",
    "        executor.map(update_data, range(3))\n",
    "\n",
    "\n",
    "concurrent_update()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "892a7a41-89b2-4ceb-8e6c-221d417520d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "with duckdb.connect(\"presidents.db\") as conn:\n",
    "    print(\n",
    "        conn.sql(\n",
    "            \"\"\"\n",
    "            SELECT last_name, first_name\n",
    "            FROM presidents\n",
    "            WHERE sequence = 1\n",
    "            \"\"\"\n",
    "        )\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bd5f57cf-b8a0-4833-9675-bcc7a6907124",
   "metadata": {},
   "source": [
    "# Integrating DuckDB Within Your Python Environment\n",
    "## Creating Python Functions for DuckDB to Use"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ecd1c32-608a-43a1-a3fd-25519585a4e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "\n",
    "def short_name(first_name: str, last_name: str) -> str:\n",
    "    return f\"{first_name[0]}. {last_name}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49f50c03-2557-4637-98c9-e114a47de8f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "short_name(\"Abraham\", \"Lincoln\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b2772cd3-0f3e-48ab-99c2-949468cb57e8",
   "metadata": {},
   "source": [
    "**Only run this code if you need to update your existing `short_name()` function**\n",
    "\n",
    "```python\n",
    "duckdb.remove_function(\"short_name\")\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e001fe4a-498a-496e-901d-871b922e4231",
   "metadata": {},
   "outputs": [],
   "source": [
    "duckdb.create_function(\"short_name\", short_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6a94bdc-4a89-41b3-921a-8142b43dcebe",
   "metadata": {},
   "outputs": [],
   "source": [
    "presidents = duckdb.read_parquet(\"presidents.parquet\")\n",
    "\n",
    "duckdb.sql(\n",
    "    \"\"\" \n",
    "    SELECT short_name(first_name::VARCHAR, last_name::VARCHAR) AS name,\n",
    "    (term_end - term_start) AS \"days in office\"\n",
    "    FROM presidents\n",
    "    \"\"\"\n",
    ").limit(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2181faed-0d54-470c-9174-f8f0a681069f",
   "metadata": {},
   "source": [
    "# Using Polars and pandas With DuckDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3934e0e3-7875-40ad-9882-a06c87030c6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python -m pip install pandas polars pyarrow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77a11ef6-308e-4279-94f1-a38b1b046874",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "with duckdb.connect(\"presidents.db\") as conn:\n",
    "    pandas_presidents = conn.sql(\n",
    "        \"\"\"\n",
    "        SELECT last_name, first_name\n",
    "        FROM presidents\n",
    "        WHERE sequence BETWEEN 2 AND 5\n",
    "        \"\"\"\n",
    "    ).df()\n",
    "\n",
    "pandas_presidents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d348d5dc-ff6a-4a61-9924-24ff345a63bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "presidents = duckdb.read_parquet(\"presidents.parquet\").set_alias(\"presidents\")\n",
    "parties = duckdb.read_json(\"parties.json\").set_alias(\"parties\")\n",
    "\n",
    "(\n",
    "    presidents.join(parties, \"presidents.party_id = parties.party_id\")\n",
    "    .select(\"first_name\", \"last_name\", \"party_name\")\n",
    "    .order(\"last_name DESC\")\n",
    ").pl().head(3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
